version 8.2
capture clear
capture log close
set more off
set mem 1000m
set mat 800


use "New_Comprehensive_July_5_2016_Version11.dta"
  
/*
*Description of country year:  
keep country
sort country
keep if country!=country[_n-1]
*/


*country: country and year
gen temp=length(country)
gen temp2=temp-4
gen countryname=substr(country,1,temp2)
*browse country countryname temp temp2
drop temp temp2

gen temp=substr(country,-4,.)
gen year=real(temp)
drop temp

*browse countryname year


****replace the name of countries:
replace countryname="Antigua and Barbuda" if countryname=="Antiguaandbarbuda"
replace countryname="Bahamas, The" if countryname=="Bahamas"
*3 surveys, drop 1:
drop if countryname=="Bulgaria" & year==2009
replace countryname="Burkina Faso" if countryname=="BurkinaFaso" 
replace countryname="Cabo Verde" if countryname=="CapeVerde" 
replace countryname="Central African Republic" if countryname=="Centralafricanrepublic" 
replace countryname="Congo, Rep." if countryname=="Congo" 
replace countryname="Congo, Dem. Rep." if countryname=="DRC" 
***3 surveys, drop one:
drop if countryname=="Congo, Dem. Rep." & year==2010
replace countryname="Costa Rica" if countryname=="Costarica" 
replace countryname="Cote d'Ivoire" if countryname=="Côte d’Ivoire" 
replace countryname="Dominican Republic" if countryname=="DominicanRepublic" 
replace countryname="Egypt, Arab Rep." if countryname=="Egypt" 
replace countryname="El Salvador" if countryname=="ElSalvador" | countryname=="Elsalvador" 
replace countryname="Gambia, The" if countryname=="Gambia" 
replace countryname="Guinea-Bissau" if countryname=="GuineaBissau" 
replace countryname="Lao PDR" if countryname=="LaoPDR" 
***2 endline:
drop if countryname=="Lao PDR"&year==2012
replace countryname="Macedonia, FYR" if countryname=="Fyr Macedonia" 
replace countryname="Micronesia, Fed. Sts." if countryname=="Micronesia" 
replace countryname="Papua New Guinea" if countryname=="PapuaNewGuinea" 
replace countryname="Russian Federation" if countryname=="Russia" 
replace countryname="South Africa" if countryname=="SouthAfrica" 
replace countryname="South Sudan" if countryname=="Southsudan" 
replace countryname="Sri Lanka" if countryname=="SriLanka" 
replace countryname="St. Kitts and Nevis" if countryname=="StKittsandNevis" 
replace countryname="St. Lucia" if countryname=="StLucia" 
replace countryname="St. Vincent and the Grenadines" if countryname=="StVincentandGrenadines" 
replace countryname="Timor-Leste" if countryname=="Timor Leste" 
replace countryname="Trinidad and Tobago" if countryname=="TrinidadandTobago" 
replace countryname="Venezuela, RB" if countryname=="Venezuela" 
replace countryname="West Bank and Gaza" if countryname=="West Bank And Gaza"
replace countryname="Yemen, Rep." if countryname=="Yemen"

***generate a continent variable:

gen continent="Eastern_Europe" if countryname=="Albania"|countryname=="Armenia"|countryname=="Azerbaijan"|countryname=="Belarus" ///
|countryname=="Bosnia and Herzegovina"|countryname=="Bulgaria"|countryname=="Croatia"|countryname=="Czech Republic"|countryname=="Estonia" ///
|countryname=="Georgia"|countryname=="Hungary"|countryname=="Kosovo"|countryname=="Latvia"|countryname=="Lithuania"|countryname=="Macedonia, FYR" ///
|countryname=="Moldova"|countryname=="Montenegro"|countryname=="Poland"|countryname=="Romania"|countryname=="Russian Federation"|countryname=="Serbia" ///
|countryname=="Slovak Republic"|countryname=="Slovenia"|countryname=="Sweden"|countryname=="Ukraine"

*East Asia and pacific and South Asia and central asia:
replace continent="Asia" if countryname=="Afghanistan" ///
|countryname=="Bangladesh"|countryname=="Bhutan"|countryname=="Cambodia"|countryname=="China"|countryname=="Fiji"|countryname=="India" ///
|countryname=="Indonesia"|countryname=="Kazakhstan"|countryname=="Kyrgyz Republic"|countryname=="Lao PDR"|countryname=="Malaysia"|countryname=="Micronesia, Fed. Sts." ///
|countryname=="Mongolia"|countryname=="Myanmar"|countryname=="Nepal"|countryname=="Pakistan"|countryname=="Papua New Guinea"|countryname=="Philippines" ///
|countryname=="Samoa"|countryname=="Solomon Islands"|countryname=="Sri Lanka"|countryname=="Tajikistan"|countryname=="Thailand"|countryname=="Timor-Leste" ///
|countryname=="Tonga"|countryname=="Uzbekistan"|countryname=="Vanuatu"|countryname=="Vietnam"

*middle east and North Africa:
replace continent="Middle_East" if countryname=="Egypt, Arab Rep." ///
|countryname=="Iraq"|countryname=="Israel"|countryname=="Jordan"|countryname=="Lebanon"|countryname=="Morocco"|countryname=="Tunisia" ///
|countryname=="Turkey"|countryname=="West Bank and Gaza"|countryname=="Yemen"

*Latin america, Central America, the Carribean:
replace continent="Latin_America" if countryname=="Antigua and Barbuda"|countryname=="Argentina" ///
|countryname=="Bahamas, The"|countryname=="Barbados"|countryname=="Belize"|countryname=="Bolivia"|countryname=="Brazil" ///
|countryname=="Chile"|countryname=="Colombia"|countryname=="Costa Rica"|countryname=="Dominica"|countryname=="Dominican Republic" ///
|countryname=="Ecuador"|countryname=="El Salvador"|countryname=="Grenada"|countryname=="Guatemala"|countryname=="Guyana" ///
|countryname=="Honduras"|countryname=="Jamaica"|countryname=="Mexico"|countryname=="Nicaragua" ///
|countryname=="Panama"|countryname=="Paraguay"|countryname=="Peru"|countryname=="St. Kitts and Nevis"|countryname=="St. Lucia"|countryname=="St. Vincent and the Grenadines" ///
|countryname=="Suriname"|countryname=="Trinidad and Tobago"|countryname=="Uruguay"|countryname=="Venezuela, RB"

*Sub-Saharan Africa:
replace continent="Africa" if countryname=="Angola" ////
|countryname=="Benin"|countryname=="Botswana"|countryname=="Burkina Faso"|countryname=="Burundi"|countryname=="Cabo Verde" ///
|countryname=="Cameroon"|countryname=="Central African Republic"|countryname=="Chad"|countryname=="Congo, Dem. Rep."|countryname=="Congo, Rep." ///
|countryname=="Cote d'Ivoire"|countryname=="Djibouti"|countryname=="Eritrea"|countryname=="Ethiopia"|countryname=="Gabon"|countryname=="Gambia, The" ///
|countryname=="Ghana"|countryname=="Guinea"|countryname=="Guinea-Bissau"|countryname=="Kenya" ///
|countryname=="Lesotho"|countryname=="Liberia"|countryname=="Madagascar"|countryname=="Malawi"|countryname=="Mali"|countryname=="Mauritania" ///
|countryname=="Mauritius"|countryname=="Mozambique"|countryname=="Namibia"|countryname=="Niger"|countryname=="Nigeria"|countryname=="Rwanda" ///
|countryname=="Senegal"|countryname=="Sierra Leone"|countryname=="South Africa"|countryname=="South Sudan"|countryname=="Sudan"|countryname=="Swaziland" ///
|countryname=="Tanzania"|countryname=="Togo"|countryname=="Uganda"|countryname=="Zambia"|countryname=="Zimbabwe"


 

***Age fo firm:
*browse b5
tab b5
replace b5=. if b5<0


*h7a (or j1): "the court system is fair, impartial, and uncorrupted"
replace h7a=j1 if h7a==.
sum h7a
tab h7a
replace h7a=. if h7a<=0|h7a>=5
sum h7a
tab h7a
label variable h7a "The court system is fair, impartial and uncorrupted"


**Size:
*codebook size_num
*browse size_num if size_num<0
replace size_num=. if size_num<0

***Capital:
local list_variables n7a n7b
foreach var of local list_variables{
	replace `var'=. if `var'<0
	}

egen n7=rowmean(n7a n7b)
gen n7_perwo=n7/size_num


*value added

local list_outcomes a b c d e f g h 
foreach var of local list_outcomes{
	replace n2`var'=0 if n2`var'==.|n2`var'<0
	}
	
replace d2=. if d2<0	

gen va=d2-n2b-n2c-n2d-n2e-n2f-n2g-n2h
label variable va "Value added"
gen va_perwo=(d2-n2b-n2c-n2d-n2e-n2f-n2g-n2h)/size_num
label variable va_perwo "Value added per worker"

*Profit:
gen profit=d2-n2a-n2b-n2c-n2d-n2e-n2f-n2g-n2h
label variable profit "Profit"

**Proportion of skilled workers:
local list_variables  l3a l4a
foreach var of local list_variables{
	replace `var'=. if `var'<0
	}

gen prop_skill=l4a/l3a

***for triple diff:
gen input_output=(n2b+n2c+n2d+n2e+n2f+n2g+n2h)/d2
label variable input_output "Value inputs (as a prop of output)"

*Herfindahl index of complexity:
gen herf=1
*browse n2a n2b n2c n2d n2e n2f n2g n2h
egen total_input_costs=rowtotal(n2a n2b n2c n2d n2e n2f n2g n2h)
local list_inputs n2a n2b n2c n2d n2e n2f n2g n2h
foreach var of local list_inputs{
	gen share_`var'=`var'/total_input_costs
	replace herf=herf-(share_`var')^2
	}

keep countryname continent year d1a2 sector b5 b6b h7a size_num n7_perwo va_perwo profit va prop_skill input_output herf


***generate a dummy "post" if we are in a post year
sort countryname year
egen post_year=max(year), by(countryname)
gen post=1 if year==post_year
replace post=0 if post==.
drop post_year

save "Enterprise surveys_clean.dta", replace



*****To get a description of the d1a2:
use "ISIC rev 3.1.dta"
drop if code==""
gen d1a2=code
replace d1a2="" if code=="A"|code=="B"|code=="C"|code=="D"|code=="E"|code=="F"|code=="G"|code=="H"|code=="I"|code=="J"|code=="K"|code=="L"|code=="M"|code=="N"|code=="O"|code=="P"|code=="Q"

destring d1a2, replace
sort d1a2
save "ISIC rev 3.1_clean.dta", replace

use "Enterprise surveys_clean.dta"
sort d1a2
merge d1a2 using "ISIC rev 3.1_clean.dta"
*codebook _merge
*browse countryname d1a2 description
drop _merge
save "Enterprise surveys_clean.dta", replace



****generates a database with one line per country-year cell:
clear
use "Enterprise surveys_clean.dta"
collapse (mean) h7a, by(countryname year)
sort countryname year
gen post=1 if countryname==countryname[_n-1]
replace post=0 if post==.
save "Enterprise surveys_clean2.dta", replace


exit


